import requests
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
import ipywidgets as widgets
from ipywidgets import interact
import plotly.express as px
# Try this first
import plotly
plotly.offline.init_notebook_mode(connected=True)
# If it does not work, try this too
from IPython.display import Javascript
from plotly.offline import get_plotlyjs
Javascript(get_plotlyjs())
# !curl "https://opendata.tpg.ch/api/explore/v2.1/catalog/datasets/montees-par-arret-par-ligne/exports/parquet?lang=fr&timezone=Europe%2FBerlin" >> "../data/montees-par-arret-par-ligne.parquet"
# !curl "https://opendata.tpg.ch/api/explore/v2.1/catalog/datasets/montees-par-arret-par-ligne/exports/json?lang=fr&timezone=Europe%2FBerlin" >> "../data/montees-par-arret-par-ligne.json"
# !curl "https://opendata.tpg.ch/api/explore/v2.1/catalog/datasets/arrets/exports/json?lang=fr&timezone=Europe%2FBerlin" >> "../data/arrets.json"
data_arrets = pd.read_json('../data/arrets.json')
data_arrets.head()
| arretcodelong | nomarret | commune | pays | codedidoc | coordonnees | actif | |
|---|---|---|---|---|---|---|---|
| 0 | _BADNF | Bardonnex Douane - F | SAINT-JULIEN-EN-GENEVOIS | FR | NaN | {'lon': 6.096618, 'lat': 46.142014} | N |
| 1 | _CANDF | Bois Candide-Dne - F | FERNEY-VOLTAIRE | FR | NaN | {'lon': 6.092343, 'lat': 46.243755} | Y |
| 2 | _CZDNF | Croix-de-Rozon-Dne - F | COLLONGES-SOUS-SALÈVE | FR | NaN | {'lon': 6.137984, 'lat': 46.143688} | N |
| 3 | _DOSOF | Soral-Dne - F | VIRY | FR | NaN | {'lon': 6.03604, 'lat': 46.136703} | Y |
| 4 | _GSDNS | Grand-Saconnex-Dne - CH | LE GRAND-SACONNEX | CH | NaN | {'lon': 6.120933, 'lat': 46.24839} | Y |
# transform coordinates
data_arrets["coordonnees_lon"] = data_arrets["coordonnees"].apply(lambda x : x['lon'] if x is not None else None)
data_arrets["coordonnees_lat"] = data_arrets["coordonnees"].apply(lambda x : x['lat'] if x is not None else None)
data_arrets.drop(columns='coordonnees', inplace=True)
# replace actif by boolean
data_arrets.actif = data_arrets.actif.map({"Y":True, "N":False})
data_arrets
| arretcodelong | nomarret | commune | pays | codedidoc | actif | coordonnees_lon | coordonnees_lat | |
|---|---|---|---|---|---|---|---|---|
| 0 | _BADNF | Bardonnex Douane - F | SAINT-JULIEN-EN-GENEVOIS | FR | NaN | False | 6.096618 | 46.142014 |
| 1 | _CANDF | Bois Candide-Dne - F | FERNEY-VOLTAIRE | FR | NaN | True | 6.092343 | 46.243755 |
| 2 | _CZDNF | Croix-de-Rozon-Dne - F | COLLONGES-SOUS-SALÈVE | FR | NaN | False | 6.137984 | 46.143688 |
| 3 | _DOSOF | Soral-Dne - F | VIRY | FR | NaN | True | 6.036040 | 46.136703 |
| 4 | _GSDNS | Grand-Saconnex-Dne - CH | LE GRAND-SACONNEX | CH | NaN | True | 6.120933 | 46.248390 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4396 | ZIPL03 | ZIPLO | PLAN-LES-OUATES | CH | 8593073.0 | True | 6.101779 | 46.165997 |
| 4397 | ZIPL99 | ZIPLO | PLAN-LES-OUATES | CH | 8593073.0 | False | 6.103465 | 46.167000 |
| 4398 | ZIPLO02 | None | None | CH | NaN | False | NaN | NaN |
| 4399 | ZMON01 | ZI de Montréal | VILLE-LA-GRAND | FR | 8595870.0 | False | 6.277687 | 46.203590 |
| 4400 | ZOLA00 | Émile Zola | ANNEMASSE | FR | 8595778.0 | False | 6.231863 | 46.196251 |
4401 rows × 8 columns
data_arrets.fillna(value=np.nan, inplace=True)
data_arrets.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4401 entries, 0 to 4400 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 arretcodelong 4401 non-null object 1 nomarret 4382 non-null object 2 commune 4326 non-null object 3 pays 4401 non-null object 4 codedidoc 3746 non-null float64 5 actif 4401 non-null bool 6 coordonnees_lon 4253 non-null float64 7 coordonnees_lat 4253 non-null float64 dtypes: bool(1), float64(3), object(4) memory usage: 245.1+ KB
data_arrets.isnull().sum()
arretcodelong 0 nomarret 19 commune 75 pays 0 codedidoc 655 actif 0 coordonnees_lon 148 coordonnees_lat 148 dtype: int64
data_arrets.duplicated().sum()
0
data_arrets_actif = data_arrets[data_arrets.actif == True]
def draw_map_tpg(coordonnees_centre, zoom_start=12):
m = folium.Map(location=coordonnees_centre, zoom_start=zoom_start, min_zoom=9, control_scale=True)
marker_cluster = MarkerCluster(name='Arrêts TPG').add_to(m)
# folium.LayerControl().add_to(m)
for i in range(len(data_arrets_actif)):
lon = data_arrets_actif.iloc[i]['coordonnees_lon']
lat = data_arrets_actif.iloc[i]['coordonnees_lat']
if not np.isnan(lon) and not np.isnan(lat):
popup_html = "<b>Nom :</b> {}<br>".format(data_arrets_actif.iloc[i]['nomarret'])
popup_html += "<b>Commune :</b> {}<br>".format(data_arrets_actif.iloc[i]['commune'])
popup_html += "<b>Pays :</b> {}<br>".format(data_arrets_actif.iloc[i]['pays'])
popup_html += "<b>Code Arret :</b> {}<br>".format(data_arrets_actif.iloc[i]['arretcodelong'])
if data_arrets_actif.iloc[i]['pays'] == 'CH':
marker_color = 'red'
else:
marker_color = 'blue'
folium.Marker(
location=[lat, lon],
tooltip=data_arrets_actif.iloc[i]['nomarret'],
icon=folium.Icon(color=marker_color, icon="bus", prefix="fa"),
popup=folium.Popup(popup_html, max_width=300)
).add_to(marker_cluster)
return m
coordonnees_centre = [data_arrets_actif.coordonnees_lat.mean(), data_arrets_actif.coordonnees_lon.mean()]
m = draw_map_tpg(coordonnees_centre)
m
outfp = "../outputs/map_arrets.html"
m.save(outfp)
def geocode(address):
params = { "q": address, 'format': 'json' }
response = requests.get(f"https://nominatim.openstreetmap.org/search", params=params)
if response.status_code == 200:
places = response.json()
if len(places) > 0:
place = places[0]
lat = place['lat']
lon = place['lon']
display_name = place['display_name']
print(f'Adresse trouvée: {display_name}')
return place
print("Pas d'adresse trouvée")
return None
address = input("Entrez l'adresse choisie > ")
place = geocode(address)
if place is None:
place = geocode('Genève')
coordonnees_centre = [place['lat'], place['lon']]
m = draw_map_tpg(coordonnees_centre, zoom_start=16)
folium.Marker(
location=coordonnees_centre,
tooltip=place['name'] if place['name'] != '' else place['display_name'],
popup=folium.Popup(place['display_name'], max_width=300)
).add_to(m)
m
Pas d'adresse trouvée Adresse trouvée: Genève, Schweiz/Suisse/Svizzera/Svizra
data_montees = pd.read_parquet('../data/montees-par-arret-par-ligne.parquet')
data_montees.head()
| date | ligne | ligne_type_act | jour_semaine | horaire_type | arret | arret_code_long | indice_semaine | indice_jour_semaine | nb_de_montees | nb_de_descentes | mois_annee | coordonnees | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-03-20 | 55 | SECONDAIRE | 1-Lundi | NORMAL | Vireloup | VRLP01 | 12 | 1 | 9.63 | 0.00 | 2023-03 | b'\x01\x01\x00\x00\x00\xfbw}\xe6\xac\x7f\x18@w... |
| 1 | 2023-03-20 | 56 | SECONDAIRE | 1-Lundi | NORMAL | Bergère | BRGE01 | 12 | 1 | 16.37 | 1.00 | 2023-03 | b'\x01\x01\x00\x00\x00dw\x81\x92\x02;\x18@\xc0... |
| 2 | 2023-03-20 | 56 | SECONDAIRE | 1-Lundi | NORMAL | Blandonnet | BLDO01 | 12 | 1 | 64.38 | 12.81 | 2023-03 | b'\x01\x01\x00\x00\x00\x0b\x99+\x83jc\x18@/\x8... |
| 3 | 2023-03-20 | 56 | SECONDAIRE | 1-Lundi | NORMAL | Hôpital de La Tour | HTOU04 | 12 | 1 | 23.16 | 2.07 | 2023-03 | b'\x01\x01\x00\x00\x00\xa8\xc8!\xe2\xe6D\x18@\... |
| 4 | 2023-03-20 | 56 | SECONDAIRE | 1-Lundi | NORMAL | ICC | ICC00 | 12 | 1 | 0.00 | 2.96 | 2023-03 | b'\x01\x01\x00\x00\x00\xe7p\xad\xf6\xb0g\x18@\... |
data_montees.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3847795 entries, 0 to 3847794 Data columns (total 13 columns): # Column Dtype --- ------ ----- 0 date object 1 ligne object 2 ligne_type_act object 3 jour_semaine object 4 horaire_type object 5 arret object 6 arret_code_long object 7 indice_semaine int64 8 indice_jour_semaine int64 9 nb_de_montees float64 10 nb_de_descentes float64 11 mois_annee object 12 coordonnees object dtypes: float64(2), int64(2), object(9) memory usage: 381.6+ MB
data_montees.isnull().sum()
date 0 ligne 0 ligne_type_act 0 jour_semaine 0 horaire_type 0 arret 0 arret_code_long 0 indice_semaine 0 indice_jour_semaine 0 nb_de_montees 0 nb_de_descentes 0 mois_annee 0 coordonnees 29 dtype: int64
data_montees.duplicated().sum()
0
data_montees['date'] = pd.to_datetime(data_montees['date'])
# Set default start and end dates
default_start_date = data_montees['date'].min()
default_end_date = data_montees['date'].max()
# Create date range picker widgets with default values
@interact(start_date=widgets.DatePicker(value=default_start_date), end_date=widgets.DatePicker(value=default_end_date))
def update_chart(start_date, end_date):
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
filtered_data = data_montees[(data_montees['date'] >= start_date) &
(data_montees['date'] <= end_date)]
total_montees_par_arret_filtered = filtered_data.groupby(by='arret_code_long')[['nb_de_montees', 'nb_de_descentes']].sum().sort_values(by='nb_de_montees', ascending=False)
total_montees_par_arret_filtered.reset_index(inplace=True)
total_montees_par_arret_filtered = total_montees_par_arret_filtered.merge(right=data_arrets, how='left', left_on='arret_code_long', right_on='arretcodelong')
fig = px.bar(total_montees_par_arret_filtered, x="nomarret", y="nb_de_montees", hover_data="arret_code_long",
width=1200, height=600, orientation='v',
title='Total Montees par Arret', labels={'arret_code_long': 'Arret Code Long', 'nomarret':'Nom arrêt', 'nb_de_montees': 'Total Montees'})
fig.show()
interactive(children=(DatePicker(value=Timestamp('2021-03-01 00:00:00'), description='start_date'), DatePicker…
# Define a function to update the chart (heatmap)
def update_chart(start_date, end_date):
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
filtered_data = data_montees[(data_montees['date'] >= start_date) &
(data_montees['date'] <= end_date)]
total_montees_par_arret_filtered = filtered_data.groupby(by='arret_code_long')[['nb_de_montees']].sum().sort_values(by='nb_de_montees', ascending=False)
total_montees_par_arret_filtered.reset_index(inplace=True)
total_montees_par_arret_filtered = total_montees_par_arret_filtered.merge(right=data_arrets, how='left', left_on='arret_code_long', right_on='arretcodelong')
fig = px.density_mapbox(total_montees_par_arret_filtered, lat='coordonnees_lat', lon='coordonnees_lon', z='nb_de_montees',
hover_name='arret_code_long',hover_data='nomarret',
radius=10, center=dict(lat=46.2044, lon=6.1432), zoom=11,
mapbox_style="open-street-map",
# mapbox_style="carto-positron",
width=900, height=600,
title=f"Total Montees par Arret (Heatmap) (du {start_date.strftime('%d/%m/%Y')} au {end_date.strftime('%d/%m/%Y')})",
labels={'nb_de_montees': 'Total Montees', 'nomarret': 'Arrêt'})
fig.show()
# Set default start and end dates
default_start_date = data_montees['date'].min()
default_end_date = data_montees['date'].max()
# Create date range picker widgets with default values
@interact(start_date=widgets.DatePicker(value=default_start_date), end_date=widgets.DatePicker(value=default_end_date))
def draw_map_and_heatmap(start_date, end_date):
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
data_arrets_actif = data_arrets[data_arrets.actif == True]
# Update the heatmap
update_chart(start_date, end_date)
interactive(children=(DatePicker(value=Timestamp('2021-03-01 00:00:00'), description='start_date'), DatePicker…